package com.yuanzheng.demo.sparkhbase

import org.apache.spark.{Partitioner, SparkConf, SparkContext}

/*自定义分区*/
class initPartitioner(partsNum: Int) extends Partitioner {
  override def numPartitions: Int = partsNum
  override def getPartition(key: Any): Int = {
    key.toString.toInt % 10
  }
}

object myPartitioner {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    val sc = new SparkContext(conf)
    //模拟5个分区的数据
    val data = sc.parallelize(1 to 10, 5)
    //根据尾号转变为10个分区，分别写到10个文件
    /*partitionBy只能对键值对进行重分区*/
    data.map((_, 1)).partitionBy(new initPartitioner(10)).map(_._1).saveAsTextFile("")
  }
}
